CALL LIBRARY

OPEN AND READ FILE

df <- read.csv(file.choose())
head(df)

CHANGE CHARACTER TO FACTOR

for(i in colnames(df)){
  if(is.character(df[[i]])){
  df[[i]] = as.factor(df[[i]])
  }
}

str(df)
## 'data.frame':    149 obs. of  20 variables:
##  $ Country.name                              : Factor w/ 149 levels "Afghanistan",..: 41 34 129 55 97 104 128 79 98 7 ...
##  $ Regional.indicator                        : Factor w/ 10 levels "Central and Eastern Europe",..: 10 10 10 10 10 10 10 10 6 10 ...
##  $ Ladder.score                              : num  7.84 7.62 7.57 7.55 7.46 ...
##  $ Standard.error.of.ladder.score            : num  0.032 0.035 0.036 0.059 0.027 0.035 0.036 0.037 0.04 0.036 ...
##  $ upperwhisker                              : num  7.9 7.69 7.64 7.67 7.52 ...
##  $ lowerwhisker                              : num  7.78 7.55 7.5 7.44 7.41 ...
##  $ Logged.GDP.per.capita                     : num  10.8 10.9 11.1 10.9 10.9 ...
##  $ Social.support                            : num  0.954 0.954 0.942 0.983 0.942 0.954 0.934 0.908 0.948 0.934 ...
##  $ Healthy.life.expectancy                   : num  72 72.7 74.4 73 72.4 73.3 72.7 72.6 73.4 73.3 ...
##  $ Freedom.to.make.life.choices              : num  0.949 0.946 0.919 0.955 0.913 0.96 0.945 0.907 0.929 0.908 ...
##  $ Generosity                                : num  -0.098 0.03 0.025 0.16 0.175 0.093 0.086 -0.034 0.134 0.042 ...
##  $ Perceptions.of.corruption                 : num  0.186 0.179 0.292 0.673 0.338 0.27 0.237 0.386 0.242 0.481 ...
##  $ Ladder.score.in.Dystopia                  : num  2.43 2.43 2.43 2.43 2.43 2.43 2.43 2.43 2.43 2.43 ...
##  $ Explained.by..Log.GDP.per.capita          : num  1.45 1.5 1.57 1.48 1.5 ...
##  $ Explained.by..Social.support              : num  1.11 1.11 1.08 1.17 1.08 ...
##  $ Explained.by..Healthy.life.expectancy     : num  0.741 0.763 0.816 0.772 0.753 0.782 0.763 0.76 0.785 0.782 ...
##  $ Explained.by..Freedom.to.make.life.choices: num  0.691 0.686 0.653 0.698 0.647 0.703 0.685 0.639 0.665 0.64 ...
##  $ Explained.by..Generosity                  : num  0.124 0.208 0.204 0.293 0.302 0.249 0.244 0.166 0.276 0.215 ...
##  $ Explained.by..Perceptions.of.corruption   : num  0.481 0.485 0.413 0.17 0.384 0.427 0.448 0.353 0.445 0.292 ...
##  $ Dystopia...residual                       : num  3.25 2.87 2.84 2.97 2.8 ...

DATA PREPROCESSING

CHECK IF THERE’S NA OR NOT

sum(is.na(df))
## [1] 0

REMOVE DUPLICATE

df <- df %>% 
  distinct(.)

RENAME COLUMN

df <- rename(df, Happiness.score = Ladder.score,
              Standard.error.of.happiness = Standard.error.of.ladder.score,
              Life.expectancy = Healthy.life.expectancy,
              Freedom.of.choices = Freedom.to.make.life.choices)

colnames(df)
##  [1] "Country.name"                              
##  [2] "Regional.indicator"                        
##  [3] "Happiness.score"                           
##  [4] "Standard.error.of.happiness"               
##  [5] "upperwhisker"                              
##  [6] "lowerwhisker"                              
##  [7] "Logged.GDP.per.capita"                     
##  [8] "Social.support"                            
##  [9] "Life.expectancy"                           
## [10] "Freedom.of.choices"                        
## [11] "Generosity"                                
## [12] "Perceptions.of.corruption"                 
## [13] "Ladder.score.in.Dystopia"                  
## [14] "Explained.by..Log.GDP.per.capita"          
## [15] "Explained.by..Social.support"              
## [16] "Explained.by..Healthy.life.expectancy"     
## [17] "Explained.by..Freedom.to.make.life.choices"
## [18] "Explained.by..Generosity"                  
## [19] "Explained.by..Perceptions.of.corruption"   
## [20] "Dystopia...residual"

DELETE UNUSED COLUMNS

df <- df %>% 
  select(-c("Ladder.score.in.Dystopia", "Dystopia...residual", "Explained.by..Log.GDP.per.capita", "Explained.by..Healthy.life.expectancy", "Explained.by..Generosity", "Explained.by..Social.support", "Explained.by..Freedom.to.make.life.choices", "Explained.by..Perceptions.of.corruption", "upperwhisker", "lowerwhisker", "Standard.error.of.happiness"))

colnames(df)
## [1] "Country.name"              "Regional.indicator"       
## [3] "Happiness.score"           "Logged.GDP.per.capita"    
## [5] "Social.support"            "Life.expectancy"          
## [7] "Freedom.of.choices"        "Generosity"               
## [9] "Perceptions.of.corruption"

MAKE NEW DF FOR ASEAN ONLY

asean <- c("Indonesia", "Malaysia", "Philippines", "Singapore", "Thailand", "Brunei Darussalam", "Vietnam", "Laos", "Myanmar", "Cambodia")

df_asean <- df %>% 
  filter(Country.name %in% asean)

df_asean

MAKE PERCENTAGE FOR CERTAIN COLUMNS

df_percentage <- df %>% 
  mutate(
    Social.support = Social.support * 100,
    Freedom.of.choices = Freedom.of.choices * 100,
    Generosity = Generosity * 100,
    Perceptions.of.corruption = Perceptions.of.corruption * 100
  )

df_percentage

HEATMAP

num_data <- df_percentage %>% 
  select_if(is.numeric) 

corr_matrix <- round(cor(num_data),2)
 
melted_corr_matrix <- melt(corr_matrix)
head(melted_corr_matrix)
heatmap <- ggplot(data = melted_corr_matrix, aes(x = Var1, y = Var2, fill = value, text = paste(Var1, "and", Var2, "<br>Correlation :", value))) +
  geom_tile() +
  geom_text(aes(label = value), color = "black", size = 4) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Correlation Heatmap", x = "", y = "") +
  scale_fill_gradient2(low = c("#56AD6A"), high = c("#269B47"), mid = "white", 
                       midpoint = 0, limit = c(-1, 1), space = "Lab", 
                       name="Correlation")

ggplotly(heatmap, tooltip = "text")

Based on this heatmap, if we focus on Happiness Score, we can see that GDP per Capita, Social Support, Life Expectancy, and Freedom of Choices have positive correlation with Happiness Score. But, Perceptions of Corruption have negative correlation with Happiness Score. Generosity and Happiness Score doesn’t have correlation because the value is close to 0, which is 0,02. The highest correlation with Happiness Score is GDP per Capita which has a value of 0,79.

BAR CHART OF HAPPINESS SCORE IN ASEAN

bar_chart <- ggplot(df_asean, aes(x = Country.name, y = Happiness.score)) +
  geom_col(fill = c("#97CBA1")) +
  labs(title = "Happiness Score of ASEAN Countries in 2021", x = "Country", y = "Happiness Score") +
  theme(axis.line.x = element_text(angle = 45, hjust = 1)) +
  theme_minimal()

ggplotly(bar_chart)

In this bar chart, we can see that Singapore is the country that has a highest happiness score which has value of 6,377. Beside of that, country that has the lowest happiness score in ASEAN is Myanmar that has value of 4,426.

LINEAR REGRESSION OF HAPPINESS SCORE AND SOCIAL SUPPORT

lg_1 <- ggplot(df_percentage, aes(x = Happiness.score, y = Social.support)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, col = "#269B47") +
  labs(title = "Happiness Score vs Social Support", x = "Happiness Score", y = "Social Support") +
  theme_minimal()

ggplotly(lg_1)
## `geom_smooth()` using formula = 'y ~ x'

By this linear reggresion, we can see that Happiness Score and Social Support have positive correlation. It means that, the higher Social Support, the higher Happiness Score in that country.

LINEAR REGRESSION OF HAPPINESS SCORE AND GDP PER CAPITA

lg_2 <- ggplot(df_percentage, aes(x = Happiness.score, y = Logged.GDP.per.capita)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, col = "#269B47") +
  labs(title = "Happiness Score vs GDP per Capita", x = "Happiness Score", y = "GDP per Capita") +
  theme_minimal()

ggplotly(lg_2)
## `geom_smooth()` using formula = 'y ~ x'

By this linear reggresion, we can see that Happiness Score and GDP per Capita have positive correlation. It means that, the higher GDP per Capita, the higher Happiness Score in that country.

LINEAR REGRESSION OF HAPPINESS SCORE AND LIFE EXPECTANCY

lg_3 <- ggplot(df_percentage, aes(x = Happiness.score, y = Life.expectancy)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, col = "#269B47") +
  labs(title = "Happiness Score vs Life Expectancy", x = "Happiness Score", y = "Life Expectancy") +
  theme_minimal()

ggplotly(lg_3)
## `geom_smooth()` using formula = 'y ~ x'

By this linear reggresion, we can see that Happiness Score and Life Expectancy have positive correlation. It means that, the higher Life Expectancy, the higher Happiness Score in that country.

LINEAR REGRESSION OF HAPPINESS SCORE AND FREEDOM TO MAKE LIFE CHOICES

lg_4 <- ggplot(df_percentage, aes(x = Happiness.score, y = Freedom.of.choices)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, col = "#269B47") +
  labs(title = "Happiness Score vs Freedom to Make Life Choices", x = "Happiness Score", y = "Freedom to Make Life Choices") +
  theme_minimal()

ggplotly(lg_4)
## `geom_smooth()` using formula = 'y ~ x'

By this linear reggresion, we can see that Happiness Score and Freedom to Make Life Choices have positive correlation. It means that, the higher Freedom to make Life Choices, the higher Happiness Score in that country.

LINEAR REGRESSION OF HAPPINESS SCORE AND GENEROSITY

lg_5 <- ggplot(df_percentage, aes(x = Happiness.score, y = Generosity)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, col = "#269B47") +
  labs(title = "Happiness Score vs Generosity", x = "Happiness Score", y = "Generosity") +
  theme_minimal()

ggplotly(lg_5)
## `geom_smooth()` using formula = 'y ~ x'

By this linear reggresion, we can see that Happiness Score and Generosity does not have correlation. It means that, Generosity doesn’t affect Happiness Score at all.

LINEAR REGRESSION OF HAPPINESS SCORE AND PERCEPTION OF CORRUPTION

lg_6 <- ggplot(df_percentage, aes(x = Happiness.score, y = Perceptions.of.corruption)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, col = "#269B47") +
  labs(title = "Happiness Score vs Perception of Corruption", x = "Happiness Score", y = "Perception of Corruption") +
  theme_minimal()

ggplotly(lg_6)
## `geom_smooth()` using formula = 'y ~ x'

By this linear reggresion, we can see that Happiness Score and Perception of Corruption have negative correlation. It means that, the lower Perception of Corruption, the higher Happiness Score in that country.

BOXPLOT OF HAPPINESS SCORE BASED ON REGIONAL

box_plot <- ggplot(df_percentage, aes(x = Regional.indicator,y = Happiness.score)) +
  geom_boxplot(fill = "#56AD6A") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Boxplot of Happiness per Regional", x = "Regional", y = "Happiness Score")

ggplotly(box_plot)

In this box plot, we can see that Western Europe and North America and ANZ have the highest median happiness scores.Beside of that, South Asia has the lowest median happiness score. Also, we can see that North America and ANZ, as well as Sub-Saharan Africa, exhibit the highest variability in happiness scores, while East Asia and Central and Eastern Europe show the least variability.

INTERACTIVE MAPS HAPPINESS SCORE

maps_happiness <- plot_ly(
  df_percentage, 
  type = "choropleth", 
  locations = ~Country.name, 
  locationmode = 'country names',
  z = ~Happiness.score, 
  text = ~paste(Country.name, '<br>Region:', Regional.indicator, '<br>Happiness Score:', Happiness.score),
  hoverinfo = "text",
  colorscale = 'Viridis',
  marker = list(line = list(color = 'white', width = 0.5)),
  colorbar = list(title = 'Happiness Score')
)

maps_happiness <- maps_happiness %>% layout(
  title = "World Happiness Report 2021",
  geo = list(showframe = FALSE, showcoastlines = FALSE, projection = list(type = 'equirectangular'))
)

maps_happiness

In this Happiness Score maps, we can observe a global happiness map where countries are color-coded: yellow indicates the happiest countries, while deep blue represents the saddest ones.We can see that Finland is the happiest country in the world and Afghanistan is the saddest country in the world.

INTERACTIVE MAPS LIFE EXPECTANCY

maps_life <- plot_ly(
  df_percentage, 
  type = "choropleth", 
  locations = ~Country.name, 
  locationmode = 'country names',
  z = ~Life.expectancy, 
  text = ~paste(Country.name, '<br>Region:', Regional.indicator, '<br>Life Expectancy:', Life.expectancy),
  hoverinfo = "text",
  colorscale = 'YlGnBu',
  marker = list(line = list(color = 'white', width = 0.5)),
  colorbar = list(title = 'Life Expectancy')
)

maps_life <- maps_life %>% layout(
  title = "World Life Expectancy Report 2021",
  geo = list(showframe = FALSE, showcoastlines = FALSE, projection = list(type = 'equirectangular'))
)

maps_life

In this Life Expectancy maps, we can observe a global life expectancy map where countries are color-coded: yellow pastel indicates the highest life expectancy countries, while deep blue represents the lowest ones.We can see that Australia has the highest life expectancy in the world and Chad has the lowest life expectancy in the world.